library(tidyverse)
library(tigris)
library(sf)
library(usmap) # May need to install.packages to run this
library(ggplot2)
library(dplyr)
library(forecast)
library(plotly)
Downloading state shape file Data
shape_files = usmap::us_map()
asthma_df = read_csv("data/asthma_data.csv")|>
mutate(year= year_name)
## Rows: 559 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (2): year_name, prevalence_percent
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
weather_df = read_csv("data/temp_data.csv")
## Rows: 2193 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state, season
## dbl (2): year, avg_temp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Merging Asthma & temp & shapefile data
asthma_weather =
asthma_df |>
left_join(weather_df, by = c("state", "year"))
final_df =
shape_files |>
mutate(state = abbr) |>
left_join(asthma_weather, by = "state") |>
drop_na()
Mapping Prevalence data by state:
ggplot=
final_df|>
filter(year==2011)|>
ggplot() +
geom_sf(aes(fill = prevalence_percent), color = "white") +
scale_fill_viridis_c(na.value = "grey90") +
theme_minimal() +
labs(
title = "Asthma Prevalence by State (2011)",
fill = "Prevalence (%)"
) +
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
final_df|>
filter(year==2012)|>
ggplot() +
geom_sf(aes(fill = prevalence_percent), color = "white") +
scale_fill_viridis_c(na.value = "grey90") +
theme_minimal() +
labs(
title = "Asthma Prevalence by State (2012)",
fill = "Prevalence (%)"
) +
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
final_df|>
filter(year==2013)|>
ggplot() +
geom_sf(aes(fill = prevalence_percent), color = "white") +
scale_fill_viridis_c(na.value = "grey90") +
theme_minimal() +
labs(
title = "Asthma Prevalence by State (2013)",
fill = "Prevalence (%)"
) +
theme(
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)
plotly_map <- ggplotly(ggplot)
# Display the Plotly map
plotly_map
Do regions with higher asthma prevalence overlap with areas of lower average temperature?
Does analyzing prevalence data stratified by time and temperature make a difference?
final_df=
final_df |>
mutate(
temp_category = case_when(
avg_temp < 0 ~ "Cold",
avg_temp >= 0 & avg_temp < 15 ~ "Mild",
avg_temp >= 15 ~ "Warm"))|>
select(-c("abbr","fips", "year_name"))|>
select(state,prevalence_percent,avg_temp, season,year,temp_category,everything())
final_df
## Simple feature collection with 2185 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -2590847 ymin: -2608148 xmax: 2523581 ymax: 731407.9
## Projected CRS: NAD27 / US National Atlas Equal Area
## # A tibble: 2,185 × 8
## state prevalence_percent avg_temp season year temp_category full
## <chr> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 AK 8.2 1.83 Fall 2011 Mild Alaska
## 2 AK 8.2 2.12 Spring 2011 Mild Alaska
## 3 AK 8.2 11.9 Summer 2011 Mild Alaska
## 4 AK 8.2 -5.34 Winter 2011 Cold Alaska
## 5 AK 9 1.17 Fall 2012 Mild Alaska
## 6 AK 9 1.49 Spring 2012 Mild Alaska
## 7 AK 9 11.8 Summer 2012 Mild Alaska
## 8 AK 9 -8.34 Winter 2012 Cold Alaska
## 9 AK 9.3 3.55 Fall 2013 Mild Alaska
## 10 AK 9.3 0.749 Spring 2013 Mild Alaska
## # ℹ 2,175 more rows
## # ℹ 1 more variable: geom <MULTIPOLYGON [m]>
Does Correlation differ by state?
cor_results=
final_df |>
group_by(state) |>
summarise(
cor_test = list(cor.test(avg_temp, prevalence_percent)),
.groups = "drop")|>
rowwise()|>
mutate(
corr=cor_test[["estimate"]],
p_value= cor_test[["p.value"]])|>
ungroup()|>
select(-cor_test)
cor_results
## Simple feature collection with 50 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -2590847 ymin: -2608148 xmax: 2523581 ymax: 731407.9
## Projected CRS: NAD27 / US National Atlas Equal Area
## # A tibble: 50 × 4
## state geom corr p_value
## <chr> <MULTIPOLYGON [m]> <dbl> <dbl>
## 1 AK (((-2396847 -2547721, -2393297 -2546391, -2391552 -25… 0.0578 0.709
## 2 AL (((1093777 -1378535, 1093269 -1374223, 1092965 -13414… 0.0106 0.945
## 3 AR (((483065.2 -927788.2, 506062 -926263.3, 531512.5 -92… 0.0142 0.927
## 4 AZ (((-1388676 -1254584, -1389181 -1251856, -1384522 -12… 0.0613 0.693
## 5 CA (((-1719946 -1090033, -1709611 -1090026, -1700882 -11… -0.00830 0.957
## 6 CO (((-789538.7 -678773.8, -789538.2 -678769.5, -781696.… -0.0228 0.883
## 7 CT (((2161733 -83737.52, 2177182 -65221.22, 2168999 -581… 0.0308 0.843
## 8 DE (((2042506 -284367.3, 2043078 -280000.3, 2044959 -275… 0.0219 0.888
## 9 FL (((1855611 -2064809, 1860157 -2054372, 1867238 -20477… -0.0717 0.660
## 10 GA (((1311336 -999180.8, 1323127 -997255.3, 1331180 -995… -0.00238 0.988
## # ℹ 40 more rows
Correlation is small across all states
Asthma Trend Across the Us over time:
aggregated_data=
final_df|>
group_by(year)|>
summarise(avg_prevalence = mean(prevalence_percent, na.rm = TRUE))
ts_data= ts(aggregated_data[["avg_prevalence"]], start = min(aggregated_data[["year"]]), frequency = 1)
plot.ts(ts_data,
main = "Asthma Trend Across the US Over Time",
ylab = "Average Asthma Prevalence (%)",
xlab = "Year")
Does the effect of prevalence differ by state:
state_trends=
final_df |>
group_by(state) |>
summarise(
model = list(lm(prevalence_percent ~ year)))
state_trends=
state_trends |>
mutate(
model_summary = map(model, broom::tidy)) |>
unnest(model_summary)|>
filter(term == "year")|>
select(
state,
slope = estimate,
intercept = NULL,
p_value = p.value)|>
st_drop_geometry()|>
knitr::kable(digits=5)
state_trends
| state | slope | p_value |
|---|---|---|
| AK | 0.06182 | 0.00868 |
| AL | 0.16455 | 0.00001 |
| AR | 0.02909 | 0.25949 |
| AZ | 0.06091 | 0.00049 |
| CA | 0.02636 | 0.29868 |
| CO | 0.16364 | 0.00000 |
| CT | 0.09364 | 0.00000 |
| DE | 0.04727 | 0.16551 |
| FL | -0.05152 | 0.09681 |
| GA | 0.00455 | 0.86590 |
| HI | -0.06182 | 0.04633 |
| IA | 0.08273 | 0.00179 |
| ID | 0.10000 | 0.00000 |
| IL | 0.03455 | 0.07010 |
| IN | 0.02182 | 0.28358 |
| KS | 0.18364 | 0.00000 |
| KY | 0.05545 | 0.17836 |
| LA | 0.19909 | 0.00000 |
| MA | -0.00909 | 0.76559 |
| MD | 0.04636 | 0.00314 |
| ME | 0.01455 | 0.59673 |
| MI | 0.09091 | 0.00002 |
| MN | 0.11364 | 0.00000 |
| MO | -0.05727 | 0.01216 |
| MS | 0.22636 | 0.00000 |
| MT | 0.11455 | 0.00003 |
| NC | 0.03455 | 0.19492 |
| ND | 0.03182 | 0.09917 |
| NE | 0.10545 | 0.00000 |
| NH | 0.14909 | 0.00023 |
| NJ | -0.00400 | 0.87883 |
| NM | 0.04292 | 0.17131 |
| NV | 0.19000 | 0.00000 |
| NY | -0.01636 | 0.41568 |
| OH | 0.01727 | 0.47857 |
| OK | 0.12545 | 0.00000 |
| OR | 0.05091 | 0.01235 |
| PA | 0.10091 | 0.00000 |
| RI | 0.09364 | 0.00184 |
| SC | 0.12182 | 0.00000 |
| SD | 0.09909 | 0.00123 |
| TN | 0.30727 | 0.00000 |
| TX | 0.07182 | 0.00022 |
| UT | 0.14455 | 0.00000 |
| VA | 0.05636 | 0.00440 |
| VT | 0.05636 | 0.01882 |
| WA | 0.04818 | 0.00625 |
| WI | 0.10182 | 0.00312 |
| WV | 0.32091 | 0.00000 |
| WY | 0.09182 | 0.00036 |